Great! At this point, I’ve run a Matlab script on the children’s raw data to collect XY gaze data for each video/trial they viewed. We start with 68 children. Here they are.
# Libraries
library(tidyverse)
library(feather)
library(lme4)
library(grid)
library(png)
library(lmerTest)package ‘lmerTest’ was built under R version 3.4.3
#library(cowplot)
# Import data (and fix one participant name, and fix Owen as EnglishExposed)
data <- read_feather("../Child Data/childxydata.feather")
# Get ages
ages <- read_csv("childrenages.csv")
data <- data %>% left_join(ages, by = "participant")
data %>% select(participant,language,age) %>% distinct() # print data tableI excluded all kids that were not included in the AOI analysis. Here is a list of all of ’em!
# Load included babies and children lists
included_babies <- read_feather("cleanedbabyeyedata.feather") %>%
select(participant) %>%
distinct()
included_children <- read_feather("cleanedchildeyedata.feather") %>%
select(participant) %>%
distinct()
included <- rbind(included_babies, included_children)
# Use antijoin to see excluded kids
excluded <- anti_join(data, included, by = "participant") %>%
select(participant, language, age) %>%
distinct()
# Print table
excluded# Remove excluded kids from main dataset
data <- semi_join(data, included, by = "participant")Let’s see a histogram of ages! After this I’ll add “baby” and “child” variables so all < 2.0 are identified as babies.
# Histogram of ages
data %>% select(participant,language,age) %>%
distinct() %>%
ggplot(aes(x = age)) + geom_histogram(fill = "royalblue", binwidth = 0.25) + ggtitle("Ages in Full Dataset")# Add baby/child agegroup column
data <- data %>%
mutate(agegroup = age < 2.0)
data$agegroup <- as.factor(data$agegroup)
data$agegroup <- fct_recode(data$agegroup, baby = "TRUE", child = "FALSE")And our participant table.
participants_b <- data %>%
filter(agegroup=="baby") %>%
select(participant, gender, language, age) %>%
distinct()
participants_b_n <- participants_b %>%
count(gender, language) %>%
spread(gender, n)
participants_b_age <- participants_b %>%
group_by(language) %>%
summarise(age_m = round(mean(age), 1),
age_sd = round(sd(age), 1),
age_min = range(age)[1],
age_max = range(age)[2]) %>%
mutate(age_range = paste(age_min, age_max, sep = " - ")) %>%
select(-age_min, -age_max) %>%
mutate(age_mean = paste(age_m, age_sd, sep = "±")) %>%
select(-age_m, -age_sd) %>%
select(language, age_mean, age_range)
participants_table_b <- left_join(participants_b_n, participants_b_age, by = "language") %>%
add_column(agegroup = "baby")
participants_c <- data %>%
filter(agegroup=="child") %>%
select(participant, gender, language, age) %>%
distinct()
participants_c_n <- participants_c %>%
count(gender, language) %>%
spread(gender, n)
participants_c_age <- participants_c %>%
group_by(language) %>%
summarise(age_m = round(mean(age), 1),
age_sd = round(sd(age), 1),
age_min = range(age)[1],
age_max = range(age)[2]) %>%
mutate(age_range = paste(age_min, age_max, sep = " - ")) %>%
select(-age_min, -age_max) %>%
mutate(age_mean = paste(age_m, age_sd, sep = "±")) %>%
select(-age_m, -age_sd) %>%
select(language, age_mean, age_range)
participants_table_c <- left_join(participants_c_n, participants_c_age, by = "language") %>%
add_column(agegroup = "child")
rbind(participants_table_b, participants_table_c) %>%
select(language, agegroup, Female, Male, age_mean, age_range)Great. Let’s save this as `cleanedchildxydata.csv’.
# Pull apart condition columns
data <- data %>%
separate(condition, into = c("story", "clipnum", "direction", "media"), sep = "_") %>%
unite(story, clipnum, col = "story", sep = "_") %>%
select(-media)
# A bit more cleaning up
data <- data %>%
mutate(direction = case_when(
direction == "FW" ~ "forward",
direction == "ER" ~ "reversed"
)) %>%
mutate(language = case_when(
language == "SignLanguageExposed" ~ "SE",
language == "EnglishExposed" ~ "NSE"
)) %>%
mutate(group = as.factor(group),
gender = as.factor(gender),
language = as.factor(language),
story = as.factor(story),
direction = as.factor(direction))
# Save as csv and feather (feather preserves column types for R)
write_csv(data,"../Child Data/cleanedchildxydata.csv")
write_feather(data,"../Child Data/cleanedchildxydata.feather")Do we need to do any other cleanup? I don’t think so.
First, let’s trim each participant’s data, getting rid of the first 60 samples (0.5 secs). Then we’ll get the the mean x and y coordinate for each story for each participant.
# Just to load data again
data <- read_feather("../Child Data/cleanedchildxydata.feather")
data <- data %>%
group_by(participant,trial) %>%
slice(60:n())
data_central_tendencies <- data %>%
group_by(language, agegroup, participant, trial) %>%
summarise(mean_x = mean(x,na.rm=TRUE),
mean_y = mean(y,na.rm=TRUE),
median_x = median(x, na.rm=TRUE),
median_y = median(y, na.rm=TRUE),
diff_x = mean_x - median_x,
diff_y = mean_y - median_y)
means <- data_central_tendencies %>%
group_by(language, agegroup, participant) %>%
summarise(mean_x = mean(mean_x, na.rm = TRUE),
mean_y = mean(mean_y, na.rm = TRUE)) %>%
group_by(language, agegroup) %>%
summarise(sd_x = sd(mean_x),
sd_y = sd(mean_y),
n = n(),
mean_x = mean(mean_x),
mean_y = mean(mean_y)*-1,
se_x = sd_x/sqrt(n),
se_y = sd_y/sqrt(n))
meansmeans_error <- means %>%
select(-n, -sd_x, -sd_y) %>%
gather(measure, value, mean_x:se_y) %>%
separate(measure, into = c("measure","axis")) %>%
spread(measure, value)
means_error %>%
filter(axis == "x") %>%
ggplot(aes(x = agegroup, y = mean, color = language, group = language)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = mean-se, ymax = mean+se),
position = position_dodge(width = 0.4), width = 0.25, size = 0.5) +
scale_y_continuous(limits = c(0,1080)) +
coord_flip() +
labs(y = "mean along x axis", title = "X-Axis Means")means_error %>%
filter(axis == "y") %>%
ggplot(aes(x = agegroup, y = mean, color = language, group = language)) +
geom_point(position = position_dodge(width = 0.4)) +
geom_errorbar(aes(ymin = mean-se, ymax = mean+se),
position = position_dodge(width = 0.4), width = 0.25, size = 0.5) +
scale_y_continuous(limits = c(-720,0)) +
labs(y = "mean along y axis", title = "Y-Axis Means")But is the y-value distribution unimodal, bimodal, normal, what? Do the means represent the only peak? Let’s get histograms.
Maybe the mixture of stories and directions throws off the histograms. Let’s break it down by “mark” which is an unique number I assigned to each story/direction.
Still seems mostly unimodal (that means one peak, right?).
But is the data skewed? I’ve been wondering if we should be using MEDIANS because there can be some extreme x and y values. But Rain said there’s been criticism of using medians and that means are better overall. Let’s have a look.
The first chart shows the difference between the mean and the median for each participant and trial. Positive means the mean is bigger than the median, negative means the median is bigger. It shows there is some skew for the y-axis…but the vast majority of differences is less than 50 px so maybe it’s okay.
The second chart shows the means and medians themselves. And the spread is pretty similar between mean and median so maybe using means is fine.
data_central_tendencies %>%
gather(measure, value, diff_x:diff_y) %>%
ggplot(aes(x = value)) + geom_histogram() + facet_grid(. ~ measure)data_central_tendencies %>%
gather(measure, value, mean_x:median_y) %>%
separate(measure, into = c("measure","axis")) %>%
ggplot(aes(x = value)) + geom_histogram() + facet_grid(measure ~ axis)Let’s run a LMM on the means. First, x means for babies.
means <- data %>%
group_by(language, agegroup, participant, age, story, direction, trial, repetition) %>%
summarise(x = mean(x, na.rm = TRUE),
y = mean(y, na.rm = TRUE))
means$repetition = as.factor(means$repetition)
means$trial = as.factor(means$trial)
lmm_baby_mean_x <- lmer(x ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "baby"))
summary(lmm_baby_mean_x)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: x ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "baby")
REML criterion at convergence: 3239.3
Scaled residuals:
Min 1Q Median 3Q Max
-4.3904 -0.4957 0.0510 0.4975 3.3887
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 8.647e+02 2.941e+01
trial (Intercept) 2.209e+01 4.700e+00
story (Intercept) 6.173e+01 7.857e+00
repetition (Intercept) 1.877e-13 4.333e-07
Residual 7.927e+02 2.816e+01
Number of obs: 336, groups: participant, 22; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 535.1205 20.6764 20.2000 25.881 <2e-16 ***
languageSE -9.1352 15.5497 21.0400 -0.587 0.563
directionreversed 2.2633 4.0361 62.9800 0.561 0.577
age -3.2071 27.4906 19.0400 -0.117 0.908
languageSE:directionreversed 0.1286 6.8950 295.0900 0.019 0.985
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE 0.075
dirctnrvrsd -0.092 0.103
age -0.914 -0.287 -0.003
lnggSE:drct 0.045 -0.221 -0.481 0.000
Y means for babies
lmm_baby_mean_y <- lmer(y ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "baby"))
summary(lmm_baby_mean_y)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: y ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "baby")
REML criterion at convergence: 3506.1
Scaled residuals:
Min 1Q Median 3Q Max
-4.6822 -0.5071 -0.0906 0.4159 7.4086
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 897.75 29.962
trial (Intercept) 0.00 0.000
story (Intercept) 137.43 11.723
repetition (Intercept) 25.42 5.041
Residual 1880.38 43.363
Number of obs: 336, groups: participant, 22; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 350.337 22.336 21.430 15.685 3.18e-13 ***
languageSE -25.177 16.854 23.270 -1.494 0.149
directionreversed -5.409 5.636 308.220 -0.960 0.338
age -14.918 29.005 18.910 -0.514 0.613
languageSE:directionreversed -0.106 10.616 305.340 -0.010 0.992
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE 0.060
dirctnrvrsd -0.117 0.162
age -0.893 -0.280 -0.005
lnggSE:drct 0.064 -0.314 -0.531 0.001
X means for children
lmm_child_mean_x <- lmer(x ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "child"))
summary(lmm_child_mean_x)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: x ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "child")
REML criterion at convergence: 4274.2
Scaled residuals:
Min 1Q Median 3Q Max
-6.4290 -0.4704 -0.0121 0.4661 4.1860
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 242.043 15.558
trial (Intercept) 0.000 0.000
story (Intercept) 34.018 5.832
repetition (Intercept) 5.778 2.404
Residual 551.029 23.474
Number of obs: 462, groups: participant, 30; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 514.35059 12.48227 29.70000 41.206 <2e-16 ***
languageSE 3.60560 6.50348 34.80000 0.554 0.583
directionreversed 0.07379 3.22374 427.50000 0.023 0.982
age 1.99964 2.26619 26.80000 0.882 0.385
languageSE:directionreversed 1.76533 4.50536 429.50000 0.392 0.695
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.207
dirctnrvrsd -0.128 0.254
age -0.905 -0.060 -0.002
lnggSE:drct 0.090 -0.347 -0.725 0.005
Y means for children
lmm_child_mean_y <- lmer(y ~ language * direction + age +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = filter(means, agegroup == "child"))
summary(lmm_child_mean_y)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: y ~ language * direction + age + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: filter(means, agegroup == "child")
REML criterion at convergence: 4972.8
Scaled residuals:
Min 1Q Median 3Q Max
-4.9513 -0.4005 -0.0926 0.2908 5.6715
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 790.91 28.123
trial (Intercept) 0.00 0.000
story (Intercept) 186.46 13.655
repetition (Intercept) 69.01 8.308
Residual 2578.64 50.780
Number of obs: 462, groups: participant, 30; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 302.4237 23.9463 29.2000 12.629 2.31e-13 ***
languageSE -16.5870 12.3379 37.6000 -1.344 0.187
directionreversed 6.2722 6.9835 428.5000 0.898 0.370
age 0.9306 4.2077 26.7000 0.221 0.827
languageSE:directionreversed 1.3769 9.7551 429.5000 0.141 0.888
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr age
languageSE -0.206
dirctnrvrsd -0.145 0.290
age -0.875 -0.060 -0.003
lnggSE:drct 0.102 -0.396 -0.725 0.006
Let’s try it with both kids and babies.
lmm_all_mean_x <- lmer(x ~ language * direction * agegroup +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = means)
summary(lmm_all_mean_x)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: x ~ language * direction * agegroup + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: means
REML criterion at convergence: 7547.2
Scaled residuals:
Min 1Q Median 3Q Max
-5.8393 -0.4558 -0.0024 0.4719 4.2147
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 480.143 21.912
trial (Intercept) 13.005 3.606
story (Intercept) 38.058 6.169
repetition (Intercept) 4.629 2.152
Residual 658.194 25.655
Number of obs: 798, groups: participant, 52; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 523.9854 6.7976 48.1000 77.084 <2e-16 ***
languageSE 4.6003 8.6996 56.6000 0.529 0.599
directionreversed 0.7118 3.6930 250.9000 0.193 0.847
agegroupbaby 9.3634 8.5519 56.3000 1.095 0.278
languageSE:directionreversed 0.5613 4.8729 730.5000 0.115 0.908
languageSE:agegroupbaby -14.4961 14.3263 56.3000 -1.012 0.316
directionreversed:agegroupbaby 0.7472 4.7488 726.5000 0.157 0.875
languageSE:directionreversed:agegroupbaby -0.1069 7.9089 725.2000 -0.014 0.989
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr aggrpb lnggSE:d lnggSE:g drctn:
languageSE -0.642
dirctnrvrsd -0.274 0.191
agegroupbby -0.652 0.509 0.189
lnggSE:drct 0.185 -0.280 -0.674 -0.145
lnggSE:ggrp 0.389 -0.607 -0.114 -0.597 0.169
drctnrvrsd: 0.185 -0.145 -0.673 -0.275 0.512 0.165
lnggSE:drc: -0.112 0.171 0.407 0.166 -0.609 -0.276 -0.602
lmm_all_mean_y <- lmer(y ~ language * direction * agegroup +
(1|story) + (1|participant) + (1|repetition) + (1|trial),
data = means)
summary(lmm_all_mean_y)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: y ~ language * direction * agegroup + (1 | story) + (1 | participant) +
(1 | repetition) + (1 | trial)
Data: means
REML criterion at convergence: 8494.8
Scaled residuals:
Min 1Q Median 3Q Max
-5.2981 -0.4472 -0.1091 0.3394 6.6748
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 8.018e+02 2.832e+01
trial (Intercept) 4.055e-12 2.014e-06
story (Intercept) 1.611e+02 1.269e+01
repetition (Intercept) 5.352e+01 7.316e+00
Residual 2.288e+03 4.783e+01
Number of obs: 798, groups: participant, 52; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 307.421 10.996 14.100 27.957 9.48e-14 ***
languageSE -16.943 12.142 64.400 -1.395 0.16770
directionreversed 5.573 6.470 742.000 0.861 0.38929
agegroupbaby 32.821 11.924 63.900 2.752 0.00769 **
languageSE:directionreversed 2.174 9.085 740.900 0.239 0.81099
languageSE:agegroupbaby -10.510 19.976 63.900 -0.526 0.60062
directionreversed:agegroupbaby -11.190 8.852 737.500 -1.264 0.20656
languageSE:directionreversed:agegroupbaby -2.598 14.743 736.100 -0.176 0.86019
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) lnggSE drctnr aggrpb lnggSE:d lnggSE:g drctn:
languageSE -0.555
dirctnrvrsd -0.297 0.271
agegroupbby -0.563 0.511 0.270
lnggSE:drct 0.213 -0.374 -0.718 -0.193
lnggSE:ggrp 0.337 -0.607 -0.162 -0.597 0.225
drctnrvrsd: 0.213 -0.193 -0.715 -0.368 0.512 0.220
lnggSE:drc: -0.129 0.228 0.434 0.222 -0.609 -0.369 -0.602
No difference in the mean looking position for x or y in children or babies! But if we put children and babies in the same dataset we get a significant main effect of children vs. babies. Okay.
And I can get x or y plots of one participant across 8 stories. Let’s do Emmet. We’ll set the x and y limits to the whole width of the Tobii monitor (1600x1200…or is it now 1080x720). But because Tobii considers (0,0) to be the upper left corner (and not the bottom left corner), we also need to flip the y axis.
emmet <- filter(data,participant=="emmet_12_10_12_CODA") %>% mutate(y = y*-1)
ggplot(emmet,aes(x=x,y=y,color=story)) + geom_point(size=0.1) + geom_path() + facet_grid(repetition ~ story) + guides(color="none") + scale_x_continuous(limit=c(0,1080)) + scale_y_continuous(limit=c(-720,0))Cool, yeah?
To measure viewing space, we can use standard deviation or IQR. Generally, if we’re using means, we should use standard deviations. If we’re using medians, we should use IQR. That’s my thinking, anyway.
We’ll try SDs first. Let’s try the first SD, which is the middle 68% of the data.
sd <- data %>%
group_by(participant, trial) %>%
summarise(mean_x = mean(x, na.rm = TRUE),
mean_y = mean(y, na.rm = TRUE),
sd_x = sd(x, na.rm = TRUE),
sd_y = sd(y, na.rm = TRUE)) %>%
ungroup()
head(sd,10)# join participant info back
participantinfo <- data %>%
select(participant, trial, age, group, agegroup, gender, language, story, direction, mark, repetition) %>%
distinct()
sd <- left_join(sd, participantinfo, by = c("participant","trial"))And check out the histograms. I truncated the y-axis at 50 counts to better see outliers.
sd %>%
gather(axis,sd,sd_x:sd_y) %>%
ggplot(aes(x=sd,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))So there are some really high outliers where the SD is 150 or 200 pixels in one direction (so a spread of as high as 400 pixels, which is a lot! I want to see those cases to see if they should be taken out or if we don’t need to worry about them.
It may be useful to think about getting rid of outliers. We should keep this in mind…
xoutliers <- sd %>%
arrange(desc(sd_x)) %>%
slice(1:20)
youtliers <- sd %>%
arrange(desc(sd_y)) %>%
slice(1:20)
xoutliersyoutliersFirst, does reversal and language experience have an effect on the SD? Babies, x-axis first.
sd$trial <- as.factor(sd$trial)
sd$repetition <- as.factor(sd$repetition)
sd_x_baby <- lmer(sd_x ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "baby"))
summary(sd_x_baby)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_x ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "baby")
REML criterion at convergence: 3199.1
Scaled residuals:
Min 1Q Median 3Q Max
-1.9662 -0.5646 -0.1797 0.3836 5.7248
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 1.239e+02 1.113e+01
trial (Intercept) 4.300e-12 2.074e-06
story (Intercept) 2.114e+01 4.598e+00
repetition (Intercept) 0.000e+00 0.000e+00
Residual 7.969e+02 2.823e+01
Number of obs: 336, groups: participant, 22; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 62.686 9.238 21.740 6.786 8.63e-07 ***
directionreversed -1.777 3.655 311.990 -0.486 0.627
languageSE -1.048 7.482 30.590 -0.140 0.890
age -20.554 12.038 19.060 -1.707 0.104
directionreversed:languageSE 6.175 6.902 307.500 0.895 0.372
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.185
languageSE 0.029 0.237
age -0.896 -0.007 -0.261
drctnrvr:SE 0.100 -0.529 -0.461 0.001
That’s fine, we’re not exactly predicting changes along the x-axis. The y-axis is what we are really interested in! :)
sd_y_baby <- lmer(sd_y ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "baby"))
summary(sd_y_baby)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_y ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "baby")
REML criterion at convergence: 3196.1
Scaled residuals:
Min 1Q Median 3Q Max
-2.4863 -0.6696 -0.0402 0.5499 4.1535
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 1.971e+02 1.404e+01
trial (Intercept) 2.390e-13 4.889e-07
story (Intercept) 2.695e+01 5.191e+00
repetition (Intercept) 0.000e+00 0.000e+00
Residual 7.703e+02 2.775e+01
Number of obs: 336, groups: participant, 22; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 86.272 10.877 21.070 7.932 9.23e-08 ***
directionreversed 1.546 3.597 310.980 0.430 0.6677
languageSE -10.514 8.575 26.580 -1.226 0.2309
age -30.506 14.279 18.960 -2.136 0.0459 *
directionreversed:languageSE 8.096 6.789 306.990 1.193 0.2339
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.154
languageSE 0.045 0.204
age -0.903 -0.006 -0.270
drctnrvr:SE 0.084 -0.530 -0.395 0.001
Now children, x-axis
sd_x_child <- lmer(sd_x ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "child"))
summary(sd_x_child)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_x ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "child")
REML criterion at convergence: 4157.7
Scaled residuals:
Min 1Q Median 3Q Max
-1.5828 -0.5697 -0.2273 0.2309 6.5944
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 3.374e+01 5.808e+00
trial (Intercept) 1.130e+01 3.362e+00
story (Intercept) 1.965e+00 1.402e+00
repetition (Intercept) 4.269e-13 6.533e-07
Residual 4.737e+02 2.177e+01
Number of obs: 461, groups: participant, 30; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 37.923 6.093 31.200 6.224 6.29e-07 ***
directionreversed 5.170 3.153 121.400 1.640 0.104
languageSE -2.885 3.586 57.900 -0.805 0.424
age -1.203 1.090 26.200 -1.104 0.279
directionreversed:languageSE -1.466 4.102 376.100 -0.357 0.721
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.255
languageSE -0.245 0.380
age -0.889 -0.005 -0.058
drctnrvr:SE 0.163 -0.660 -0.572 0.010
And children, y-axis
sd_y_child <- lmer(sd_y ~ direction * language + age + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = filter(sd, agegroup == "child"))
summary(sd_y_child)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_y ~ direction * language + age + (1 | participant) + (1 |
story) + (1 | repetition) + (1 | trial)
Data: filter(sd, agegroup == "child")
REML criterion at convergence: 4408.7
Scaled residuals:
Min 1Q Median 3Q Max
-2.0653 -0.6574 -0.2026 0.4676 4.4651
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 195.21 13.972
trial (Intercept) 0.00 0.000
story (Intercept) 16.03 4.003
repetition (Intercept) 0.00 0.000
Residual 787.89 28.069
Number of obs: 461, groups: participant, 30; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 65.689 11.618 29.000 5.654 4.11e-06 ***
directionreversed 5.244 3.808 401.100 1.377 0.1693
languageSE -11.615 6.336 39.800 -1.833 0.0743 .
age -2.757 2.133 26.900 -1.292 0.2073
directionreversed:languageSE -2.820 5.341 421.500 -0.528 0.5978
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE age
dirctnrvrsd -0.162
languageSE -0.219 0.305
age -0.914 -0.004 -0.060
drctnrvr:SE 0.112 -0.719 -0.421 0.007
And now all babies/children, x axis
sd_x_all <- lmer(sd_x ~ direction * language * agegroup + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = sd)
summary(sd_x_all)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_x ~ direction * language * agegroup + (1 | participant) +
(1 | story) + (1 | repetition) + (1 | trial)
Data: sd
REML criterion at convergence: 7398.8
Scaled residuals:
Min 1Q Median 3Q Max
-2.1004 -0.5510 -0.1986 0.3067 6.6453
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 79.076 8.892
trial (Intercept) 10.557 3.249
story (Intercept) 8.751 2.958
repetition (Intercept) 0.000 0.000
Residual 607.361 24.645
Number of obs: 797, groups: participant, 52; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 32.0056 3.5716 73.4000 8.961 2.12e-13 ***
directionreversed 5.0971 3.5042 282.5000 1.455 0.146901
languageSE -3.3573 4.6084 86.2000 -0.729 0.468265
agegroupbaby 16.2198 4.5209 85.1000 3.588 0.000556 ***
directionreversed:languageSE -1.0377 4.6582 729.0000 -0.223 0.823782
directionreversed:agegroupbaby -6.3725 4.5559 731.7000 -1.399 0.162315
languageSE:agegroupbaby -0.9728 7.5755 85.2000 -0.128 0.898128
directionreversed:languageSE:agegroupbaby 7.3354 7.5895 728.6000 0.967 0.334100
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE aggrpb drc:SE drctn: lngSE:
dirctnrvrsd -0.492
languageSE -0.650 0.343
agegroupbby -0.660 0.344 0.512
drctnrvr:SE 0.333 -0.676 -0.505 -0.260
drctnrvrsd: 0.334 -0.678 -0.260 -0.498 0.511
lnggSE:ggrp 0.394 -0.207 -0.607 -0.597 0.305 0.298
drctnrv:SE: -0.202 0.409 0.307 0.300 -0.609 -0.602 -0.500
sd_y_all <- lmer(sd_y*2 ~ direction * language * agegroup + (1|participant) + (1|story) + (1|repetition) + (1|trial), data = sd)
summary(sd_y_all)Linear mixed model fit by REML t-tests use Satterthwaite approximations to degrees of freedom [lmerMod]
Formula: sd_y * 2 ~ direction * language * agegroup + (1 | participant) +
(1 | story) + (1 | repetition) + (1 | trial)
Data: sd
REML criterion at convergence: 8718.1
Scaled residuals:
Min 1Q Median 3Q Max
-2.4227 -0.6553 -0.1538 0.4938 4.5262
Random effects:
Groups Name Variance Std.Dev.
participant (Intercept) 867.34 29.451
trial (Intercept) 6.99 2.644
story (Intercept) 41.39 6.433
repetition (Intercept) 0.00 0.000
Residual 3154.01 56.161
Number of obs: 797, groups: participant, 52; trial, 16; story, 8; repetition, 2
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 103.710 9.579 68.400 10.827 2.22e-16 ***
directionreversed 10.802 7.606 270.100 1.420 0.1567
languageSE -24.096 13.082 68.700 -1.842 0.0698 .
agegroupbaby 26.902 12.850 68.100 2.094 0.0400 *
directionreversed:languageSE -5.750 10.606 727.000 -0.542 0.5879
directionreversed:agegroupbaby -7.734 10.381 731.500 -0.745 0.4565
languageSE:agegroupbaby -6.820 21.530 68.200 -0.317 0.7524
directionreversed:languageSE:agegroupbaby 21.671 17.294 728.400 1.253 0.2106
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) drctnr lnggSE aggrpb drc:SE drctn: lngSE:
dirctnrvrsd -0.399
languageSE -0.686 0.289
agegroupbby -0.697 0.289 0.510
drctnrvr:SE 0.283 -0.708 -0.405 -0.208
drctnrvrsd: 0.283 -0.709 -0.208 -0.399 0.510
lnggSE:ggrp 0.416 -0.174 -0.607 -0.597 0.244 0.239
drctnrv:SE: -0.171 0.428 0.246 0.240 -0.609 -0.601 -0.401
So there is no effect of language or direction (or interactions) on the standard deviation. Among babies, there seems to be an effect of age on y-axis viewing space such that it narrows the older the baby is (p = 0.046).
When we put babies and children together, we see age group differences for sd_x and sd_y, not surprisingly.
Now let’s get the middle 50% (aka the IQR) of x and y for each participant’s story (we’ve already trimmed the first 60 samples). That should also take care of further weird outliers. And we are defining “viewing space” as the IQR of the x and y axis.
iqr <- data %>%
group_by(participant,trial) %>%
summarise(xIQR = IQR(x,na.rm=TRUE),
yIQR = IQR(y,na.rm=TRUE),
xmed = median(x, na.rm=TRUE),
ymed = median(y, na.rm=TRUE)) %>%
ungroup()
head(iqr,10)# Join participant info back into IQR table
participantinfo <- data %>%
select(participant, trial, age, group, gender, language, story, direction, mark, repetition) %>%
distinct()
iqr <- left_join(iqr, participantinfo, by = c("participant","trial"))And check out the histograms. I truncated the y-axis at 100 to better see outliers.
iqr %>%
gather(axis,iqr,xIQR:yIQR) %>%
ggplot(aes(x=iqr,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))So we see some outliers. Who are those? Let’s get a table of them. Review those AFTER I’ve done the cleanups of course.
xoutliers <- iqr %>%
arrange(desc(xIQR)) %>%
slice(1:10)
youtliers <- iqr %>%
arrange(desc(yIQR)) %>%
slice(1:10)Next, check the medians.
iqr %>%
gather(axis,med,xmed:ymed) %>%
ggplot(aes(x=med,fill=axis)) + geom_histogram() + facet_grid(axis~.) +
coord_cartesian(ylim = c(0,50))SO I need to review those too. After cleaning up/removing kids.
iqr.gather <- iqr %>% gather(axis,value,xIQR:ymed)
iqr.iqr <- filter(iqr.gather,axis=="xIQR" | axis=="yIQR")
iqr.med <- filter(iqr.gather,axis=="xmed" | axis=="ymed")
ggplot(iqr.iqr,aes(x=language,y=value,fill=direction)) +
geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
facet_grid(.~axis)And the median x and y position (this assumes all calibrations are correct):
ggplot(iqr.med,aes(x=language,y=value,fill=direction)) +
geom_boxplot() + theme(axis.text.x=element_text(angle=45,hjust=1)) +
facet_grid(.~axis)First, does reversal and language experience have an effect on X IQR? We have random intercepts for each participant and media, and a random slope adjustment for reversed for each participant.
xiqr.reversal <- lmer(xIQR ~ direction * language + (direction|participant) + (1|story), data = iqr)
summary(xiqr.reversal)$coefficients Estimate Std. Error df t value Pr(>|t|)
(Intercept) 38.708678 2.396168 47.13069 16.154407 0.00000000
directionreversed 3.358776 2.318054 621.22252 1.448964 0.14785227
languageSE -9.026396 3.726797 56.47549 -2.422025 0.01866818
directionreversed:languageSE -1.948012 3.634404 713.57954 -0.535992 0.59213108
That’s fine, we’re not exactly predicting changes along the x-axis. The y-axis is what we are really interested in! :)
yiqr.reversal <- lmer(yIQR ~ direction * language + (direction|participant) + (1|story), data = iqr)
summary(yiqr.reversal)$coefficients Estimate Std. Error df t value Pr(>|t|)
(Intercept) 63.239876 4.933487 49.68294 12.818495 0.000000000
directionreversed 9.340253 4.692411 172.59784 1.990502 0.048114103
languageSE -21.646409 7.748662 55.75654 -2.793567 0.007131564
directionreversed:languageSE -8.777486 7.367880 179.04123 -1.191318 0.235105783
I want to learn how to make rectangle plots so here we go. Using each participant’s four x and y medians and 4 x and y IQRs (one set for each story, for 4 stories). So I can get the logic and code down. Let’s assume all calibrations were correct. Here’s the chart for the whole media size of 1440x1080 (as reported in Tobii).
# In this order, we'll get a grand median by taking a participant's median across their 4 stories, than the median for forward and reverse across all participants.
medians <- iqr %>%
group_by(participant,direction) %>%
summarise(xIQR = median(xIQR,na.rm=TRUE),
yIQR = median(yIQR,na.rm=TRUE),
xmed = median(xmed,na.rm=TRUE),
ymed = median(ymed,na.rm=TRUE)) %>%
group_by(direction) %>%
summarise(xIQR = median(xIQR,na.rm=TRUE),
yIQR = median(yIQR,na.rm=TRUE),
x = median(xmed,na.rm=TRUE),
y = median(ymed,na.rm=TRUE))
medians <- medians %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
img <- readPNG("cindy.png")
g <- rasterGrob(img, interpolate=TRUE, width=unit(1,"npc"), height=unit(1,"npc"))
ggplot(medians, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_minimal() + xlim(0,1440) + ylim(-1080,0)# ggplot(iqr.global, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) +
# geom_hline(yintercept=-1080+885) +
# geom_hline(yintercept=-1080+525) +
# annotate(geom="text", x = 300, y = -1080+555, label = "upper shoulder point") +
# annotate(geom="point", x = 535, y = -1080+525) +
# annotate(geom="text", x = 535, y = -1080+910, label = "height line") +
# annotate(geom="rect", xmin = 535, xmax = 535+365, ymin = -525-551, ymax = -1080+525, fill="maroon", color="black", alpha=0.5) +
# annotate(geom="text", x = 700, y = -900, label = "torso")Now let’s see the variation in viewing spaces for all our individuals. Should be fun.
iqr.individuals <- iqr %>%
rename(x = xmed,
y = ymed) %>%
mutate(y = y*-1,
xmin = x-(xIQR/2),
xmax = x+(xIQR/2),
ymin = y-(yIQR/2),
ymax = y+(yIQR/2))
ggplot(iqr.individuals, aes(fill=direction,color=direction)) +
annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
ggtitle("with IQRs")Now let’s make Outer Limits charts which is IQR +/- 2 SDs. But I want to change that because I don’t like the idea of mixing IQRs and SDs.
# sd.individuals <- select(sd.individuals,participant,media,xsd,ysd)
# iqrsd.individuals <- left_join(iqr.individuals,sd.individuals,by=c("participant","media")) %>%
# mutate(xmin = xmin-(2*xsd),
# xmax = xmax+(2*xsd),
# ymin = ymin-(2*ysd),
# ymax = ymax+(2*ysd))
#
# ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
# ggtitle("with SDs")# iqrsd.individuals <- iqrsd.individuals %>%
# group_by(direction) %>%
# dplyr::summarize(x = mean(x,na.rm=TRUE),
# y = mean(y,na.rm=TRUE),
# xmin = mean(xmin,na.rm=TRUE),
# ymin = mean(ymin,na.rm=TRUE),
# xmax = mean(xmax,na.rm=TRUE),
# ymax = mean(ymax,na.rm=TRUE))
# ggplot(iqrsd.individuals, aes(fill=direction,color=direction)) +
# annotation_custom(g, xmin=-Inf, xmax=Inf, ymin=-Inf, ymax=Inf) +
# geom_rect(aes(xmin=xmin,ymin=ymin,xmax=xmax,ymax=ymax),alpha=.1) +
# theme_minimal() + xlim(0,1440) + ylim(-1080,0) + facet_wrap("direction") +
# ggtitle("Average of above chart (rain's outer limits)")Just read this good post about plotting within-subject variation and saw it can really apply to this dataset. So I’m going to try out one example.
We know the y means aren’t significantly different across groups but that’s been hard to visualize on a per-subject basis (or even from an error bar chart since the error bars are so small). Let’s visualize that!